started: Alexey Larionov, 30Jan2017
last updated: Alexey Larionov, 30Jan2017
# Time stamp
Sys.time()
## [1] "2017-01-30 20:05:00 GMT"
# Folders
setwd("/scratch/medgen/scripts/wecare_stat_01.17/scripts")
interim_data_folder <- "/scratch/medgen/scripts/wecare_stat_01.17/interim_data"
# Required libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# --- r-calculated data --- #
load(paste(interim_data_folder, "r05_calculate_egenvectors_wecare_only_jan2017.RData", sep="/"))
rm(wecare_exac.df, wecare_genotypes.mx, wecare_kgen.df, wecare_nfe_exac.df, wecare_nfe_genotypes.mx, wecare_nfe_kgen.df, wecare_nfe_phenotypes.df, wecare_nfe_variants.df, wecare_phenotypes.df, wecare_variants.df)
# eigenvectors
r_evectors.df <- as.data.frame(wecare_eigen$vectors[,1:10])
colnames(r_evectors.df) <- c("r_ev1", "r_ev2", "r_ev3", "r_ev4", "r_ev5", "r_ev6", "r_ev7", "r_ev8", "r_ev9", "r_ev10")
str(r_evectors.df)
## 'data.frame': 480 obs. of 10 variables:
## $ r_ev1 : num 0.00878 0.00978 -0.0044 -0.00765 -0.00231 ...
## $ r_ev2 : num -0.05413 -0.02963 0.00248 0.00195 0.00781 ...
## $ r_ev3 : num 0.010002 0.009256 0.000475 0.006905 0.00144 ...
## $ r_ev4 : num 0.02115 0.00508 0.00383 0.00149 -0.00243 ...
## $ r_ev5 : num -0.184168 -0.092435 0.000648 0.003585 0.016433 ...
## $ r_ev6 : num 0.024713 0.026999 0.001998 0.003521 0.000813 ...
## $ r_ev7 : num -0.02331 -0.01648 0.00636 0.00427 0.01293 ...
## $ r_ev8 : num -4.52e-02 3.42e-03 6.41e-03 -9.41e-04 9.08e-06 ...
## $ r_ev9 : num 0.000288 -0.002909 -0.010841 0.008353 0.008717 ...
## $ r_ev10: num 0.02773 -0.0363 0.00986 0.00429 0.00511 ...
# Common sence checks (using properties of eigenvectors)
apply(r_evectors.df,2,sum) # should be close to zeros
## r_ev1 r_ev2 r_ev3 r_ev4 r_ev5
## -1.398621e-16 -1.932672e-15 3.466357e-15 -1.507583e-16 8.426094e-15
## r_ev6 r_ev7 r_ev8 r_ev9 r_ev10
## -3.342287e-15 1.714937e-16 1.890185e-15 1.548837e-15 -2.406604e-15
apply(r_evectors.df,2,function(x){sum(x^2)}) # should be close to ones
## r_ev1 r_ev2 r_ev3 r_ev4 r_ev5 r_ev6 r_ev7 r_ev8 r_ev9 r_ev10
## 1 1 1 1 1 1 1 1 1 1
# eigenvalues
r_evalues.df <- as.data.frame(wecare_eigen$values)
colnames(r_evalues.df) <- "evals"
plot(r_evalues.df$evals)
# --- es-calculated data --- #
es_evectors_file="/scratch/medgen/scripts/wecare_stat_01.17/interim_data/u01_wecare_only_480_226k_eigenstrat_no_outliers/u01_wecare_only_480_226k_eigenstrat_no_outliers.evec"
es_evectors.df <- read.table(es_evectors_file)
colnames(es_evectors.df) <- c("wes_id", "es_ev1", "es_ev2", "es_ev3", "es_ev4", "es_ev5", "es_ev6", "es_ev7", "es_ev8", "es_ev9", "es_ev10", "group")
str(es_evectors.df)
## 'data.frame': 480 obs. of 12 variables:
## $ wes_id : Factor w/ 480 levels "P1_A01","P1_A02",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ es_ev1 : num 0.0157 0.0146 -0.0049 -0.0081 -0.002 -0.0064 -0.0087 -0.011 0.0283 -0.0077 ...
## $ es_ev2 : num -0.0558 -0.0315 0.0022 0.0014 0.0078 0.0066 0.0037 0.0073 -0.0578 0.0053 ...
## $ es_ev3 : num -0.0085 -0.0083 0.0002 -0.0072 -0.0017 -0.0026 -0.0034 -0.0008 -0.006 -0.0066 ...
## $ es_ev4 : num -0.0376 -0.0142 -0.0037 -0.0017 0.0035 0.0047 0.0029 0.0045 -0.0273 0.003 ...
## $ es_ev5 : num -0.1792 -0.0915 0.0005 0.0035 0.0154 ...
## $ es_ev6 : num -0.0214 -0.024 -0.0018 -0.004 -0.0011 0.0056 0.0021 -0.0024 -0.0151 -0.0041 ...
## $ es_ev7 : num 0.0058 0.0172 -0.0024 -0.0043 -0.0132 0.0068 0.0044 -0.0011 0.0261 -0.0072 ...
## $ es_ev8 : num -0.0533 -0.0026 0.0073 0.0009 0.0053 0.0028 -0.0051 -0.0028 -0.028 0.0031 ...
## $ es_ev9 : num -0.0038 0.0026 0.0117 -0.0094 -0.0083 -0.0074 0.0059 -0.006 -0.0276 -0.0017 ...
## $ es_ev10: num 0.0778 -0.0383 0.0068 -0.0006 0.0075 0 0.0016 -0.0029 0.0561 -0.0015 ...
## $ group : Factor w/ 2 levels "CBC","UBC": 1 1 1 1 1 1 2 1 1 2 ...
# Common sence checks (using properties of eigenvectors)
apply(es_evectors.df[,2:11],2,sum) # should be close to zeros
## es_ev1 es_ev2 es_ev3 es_ev4 es_ev5 es_ev6 es_ev7 es_ev8 es_ev9
## -0.0001 0.0003 0.0001 -0.0003 -0.0001 0.0010 -0.0004 -0.0008 0.0015
## es_ev10
## 0.0002
apply(es_evectors.df[,2:11],2,function(x){sum(x^2)}) # should be close to ones
## es_ev1 es_ev2 es_ev3 es_ev4 es_ev5 es_ev6 es_ev7
## 1.0000400 1.0000222 1.0000036 0.9999435 1.0000308 0.9999554 1.0000354
## es_ev8 es_ev9 es_ev10
## 0.9998510 1.0000525 1.0000621
es_evalues_file <- "/scratch/medgen/scripts/wecare_stat_01.17/interim_data/u01_wecare_only_480_226k_eigenstrat_no_outliers/u01_wecare_only_480_226k_eigenstrat_no_outliers.eval"
es_evalues.df <- read.table(es_evalues_file)
colnames(es_evalues.df) <- "evals"
str(es_evalues.df)
## 'data.frame': 480 obs. of 1 variable:
## $ evals: num 2.78 2.41 2.3 2.06 2.03 ...
plot(es_evalues.df$evals)
rm(es_evectors_file, es_evalues_file)
plot(es_evalues.df$evals, r_evalues.df$evals, main="eigenvalues: R vs EIGENSTRAT")
abline(c(0,0),c(1,1), col="red")
plot(es_evectors.df$es_ev1, r_evectors.df$r_ev1 , main="1st eigenvectors: R vs EIGENSTRAT")
abline(c(0,0),c(1,1), col="red")
# Prepare dataframe
joined_evectors.df <- cbind(es_evectors.df, r_evectors.df)
# Prepare colour scale
colours <- c("UBC" = "BLUE", "CBC" = "RED")
userColourScale <- scale_colour_manual(values=colours)
# --- 1st eigenvectors --- #
# Non-interactive plot
ggplot(joined_evectors.df, aes(es_ev1, r_ev1)) +
geom_point(aes(colour=group, fill=group)) +
labs(title="1st eigenvectors: R vs EIGENSTRAT") +
geom_abline(colour = "brown", linetype = 2) +
userColourScale
# Interactive plot
g <- ggplot(joined_evectors.df, aes(es_ev1, r_ev1)) +
geom_point(aes(colour=group, fill=group, text=wes_id)) +
labs(title="1st eigenvectors: R vs EIGENSTRAT") +
geom_abline(colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
# --- 2nd eigenvectors --- #
# Non-interactive plot
ggplot(joined_evectors.df, aes(es_ev2, r_ev2)) +
geom_point(aes(colour=group, fill=group)) +
labs(title="2nd eigenvectors: R vs EIGENSTRAT") +
geom_abline(colour = "brown", linetype = 2) +
userColourScale
# Interactive plot
g <- ggplot(joined_evectors.df, aes(es_ev2, r_ev2)) +
geom_point(aes(colour=group, fill=group, text=wes_id)) +
labs(title="2nd eigenvectors: R vs EIGENSTRAT") +
geom_abline(colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
# --- 3rd eigenvectors --- #
# Non-interactive plot
ggplot(joined_evectors.df, aes(es_ev3, r_ev3)) +
geom_point(aes(colour=group, fill=group)) +
labs(title="3rd eigenvectors: R vs EIGENSTRAT") +
geom_abline(slope=-1, colour = "brown", linetype = 2) +
userColourScale
# Interactive plot
g <- ggplot(joined_evectors.df, aes(es_ev3, r_ev3)) +
geom_point(aes(colour=group, fill=group, text=wes_id)) +
labs(title="3rd eigenvectors: R vs EIGENSTRAT") +
geom_abline(slope = -1, colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
rm(g)
save.image(paste(interim_data_folder, "r05_compare_egenvectors_wecare_only_jan2017.RData", sep="/"))
ls()
## [1] "colours" "es_evalues.df" "es_evectors.df"
## [4] "interim_data_folder" "joined_evectors.df" "r_evalues.df"
## [7] "r_evectors.df" "userColourScale" "wecare_eigen"
sessionInfo()
## R version 3.2.3 (2015-12-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Scientific Linux release 6.8 (Carbon)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plotly_4.5.6 ggplot2_2.2.1 dplyr_0.5.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.6 knitr_1.13 magrittr_1.5
## [4] munsell_0.4.3 viridisLite_0.1.3 colorspace_1.2-6
## [7] R6_2.1.2 httr_1.2.1 stringr_1.0.0
## [10] plyr_1.8.4 tools_3.2.3 grid_3.2.3
## [13] gtable_0.2.0 DBI_0.5 htmltools_0.3.5
## [16] yaml_2.1.13 lazyeval_0.2.0 assertthat_0.1
## [19] digest_0.6.10 tibble_1.1 tidyr_0.5.1
## [22] purrr_0.2.2 formatR_1.4 base64enc_0.1-3
## [25] htmlwidgets_0.8 evaluate_0.9 rmarkdown_1.0
## [28] labeling_0.3 stringi_1.1.1 scales_0.4.1
## [31] jsonlite_1.0
Sys.time()
## [1] "2017-01-30 20:05:13 GMT"